## Sourobh Ghosh, Casey Kearney, Mahdi Hashemian
## Gov 2001 Replication Project

rm(list = ls()) ## clearing workspace

require(readstata13)

setwd("C:/Documents CLK/Grad School/Spring 2016/Gov 2001/Replication Paper/Data/Data")  ## modify this as necessary

data = read.dta13("SovietCollaboration_MainDataset.dta")
data.Table5 = read.dta13("SovietCollaboration_CitationToSovietArtDataset.dta")  ## for Table 5
data.spec = read.dta13("SovietCollaboration_SpecializationDataset.dta") ## for Fig 7, Table 6, Table A8-A9

############### Table 4 #################

dropsov = data[-which(data$sovietauthor == 1), ] ## dropping soviet authors
japanese <- dropsov[which(dropsov$japanesepub == 1),] ## keeping only japanese publications
nrow(japanese)

final.japanese <- japanese[c(which(japanese$newbdrankingposition <= 3),
                             which(japanese$newbdrankingposition > 30)),] ## keeping top and bottom 3 subfields
nrow(final.japanese) ## Observations N = 5096, matching the value reported in the paper

##### robust cluster SEs function (http://www.r-bloggers.com/easy-clustered-standard-errors-in-r/)

get_CL_vcov<-function(model, cluster){
  require(sandwich)
  require(lmtest)
  
  #calculate degree of freedom adjustment
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  
  #calculate the uj's
  uj  <- apply(estfun(model),2, function(x) tapply(x, cluster, sum))
  
  #use sandwich to get the var-covar matrix
  vcovCL <- dfc*sandwich(model, meat=crossprod(uj)/N)
  return(vcovCL)
}

## All Japanese Journals
japaneset3b3 <- subset(final.japanese, newbdrankingposition<=3 | newbdrankingposition>=31, 
                  select=c(logauthourcount, newbdrankingposition, afterdummy, adjustedmsc, year,
                           japaneserankedjournal))


## creating dummy for topsoviet
topsovietdummy = rep(0,nrow(japaneset3b3))
for (i in 1:nrow(japaneset3b3)){  if (japaneset3b3$newbdrankingposition[i] < 4){topsovietdummy[i] = 1} }

japaneset3b3 = cbind(japaneset3b3,topsovietdummy) ## put in one data frame
fit <- lm(logauthourcount ~ topsovietdummy:afterdummy
         + factor(adjustedmsc) + factor(year), data = japaneset3b3)  ## OLS with fixed effects

## call robust cluster SE function and save the var-cov matrix output in an object
m1.vcovCL <- get_CL_vcov(fit, japaneset3b3$adjustedmsc)

## results
coeftest(fit, m1.vcovCL)  ## coefficient on AfterIronCurtain x SovietRich and corresponding robust cluster SE
summary(fit)  ## Multiple R-squared: 0.07627, matching report in paper. Note they use non-adjusted R^2 here


######## Ranked Japanese journals
ranked.japanese <- final.japanese[which(final.japanese$japaneserankedjournal == 1),]
nrow(ranked.japanese) ## Observations N = 3859, matching those reported in the paper

rankedt3b3 <- subset(ranked.japanese, newbdrankingposition<=3 | newbdrankingposition>=31, 
                  select=c(logauthourcount, newbdrankingposition, afterdummy, adjustedmsc, year,
                           japaneserankedjournal))

ranked.topsovietdummy = rep(0,nrow(rankedt3b3))
for (i in 1:nrow(rankedt3b3)){  if (rankedt3b3$newbdrankingposition[i] < 4){ranked.topsovietdummy[i] = 1} }

rankedt3b3 = cbind(rankedt3b3,ranked.topsovietdummy)

fit2 <- lm(logauthourcount ~ ranked.topsovietdummy:afterdummy
          + factor(adjustedmsc) + factor(year), data = rankedt3b3)## put in one data frame

## call robust cluster SE function and save the var-cov matrix output in an object
m2.vcovCL <- get_CL_vcov(fit2, rankedt3b3$adjustedmsc)

## results
coeftest(fit2, m2.vcovCL)  ## coefficient on AfterIronCurtain x SovietRich and corresponding robust cluster SE
summary(fit2)  ## R-squared of .0927 matching the value reported in table 4


## Unranked Japanese journals
unranked.japanese <- final.japanese[-which(final.japanese$japaneserankedjournal == 1),]
nrow(unranked.japanese) ## Observations N = 1237, matching report in paper

unrankedt3b3 <- subset(unranked.japanese, newbdrankingposition<=3 | newbdrankingposition>=31, 
                     select=c(logauthourcount, newbdrankingposition, afterdummy, adjustedmsc, year,
                              japaneserankedjournal))

unranked.topsovietdummy = rep(0,nrow(unrankedt3b3))
for (i in 1:nrow(unrankedt3b3)){  if (unrankedt3b3$newbdrankingposition[i] < 4){unranked.topsovietdummy[i] = 1} }

unrankedt3b3 = cbind(unrankedt3b3,unranked.topsovietdummy) ## put in one data frame

fit3 <- lm(logauthourcount ~ unranked.topsovietdummy:afterdummy
           + factor(adjustedmsc) + factor(year), data = unrankedt3b3)

## call robust cluster SE function and save the var-cov matrix output in an object
m3.vcovCL <- get_CL_vcov(fit3, unrankedt3b3$adjustedmsc)

## results
coeftest(fit3, m3.vcovCL)  ## coefficient on AfterIronCurtain x SovietRich and corresponding robust cluster SE
summary(fit3)  ## R^squared of .1007, matching what is reported in Table 4 

## Merging tables

require(stargazer)
stargazer(fit)
